15wk-1: Plotly – px를 활용한 시각화

plotly
Author

최규빈

Published

December 11, 2023

1. 강의영상

pass

2. Imports

import numpy as np
import pandas as pd
#---#
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import plotly.subplots
pd.options.plotting.backend = "plotly"
pio.templates.default = "plotly_white"

3. 아이스크림을 많이 먹으면 걸리는 병

`-`항
2

A. 자료

np.random.seed(42)
temp=pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/temp.csv').iloc[:,3].to_numpy()
sales = 20 + 2 * temp + np.random.randn(len(temp))*7 # 온도 -> 아이크림판매량
diss1 = 30 + 0.5 * temp + np.random.randn(len(temp))*1 # 온도 -> 소아마비반응수치
diss2 = 30 + 0.15 * sales + np.random.randn(len(temp))*1 # 아이스크림판매량 -> 소아마비반응수치 
df1 = pd.DataFrame({'temp':temp,'diss':diss1,'sales':sales})
df2 = pd.DataFrame({'temp':temp,'diss':diss2,'sales':sales})

B. Scatter + Line

df1.plot.scatter(
    x='sales',
    y='diss', 
    title='world1: temp --> (diss,sales)',
    trendline = 'ols', # ols ,lowess
)
df1.assign(temp_cut = lambda df: pd.qcut(df.temp,10)).sort_values('temp_cut')\
.plot.scatter(
    x='sales',
    y='diss',
    color='temp_cut',
    trendline='ols',
    title='world1: temp --> (diss,sales)',
)
df2.assign(temp_cut = lambda df: pd.qcut(df.temp,10)).sort_values('temp_cut')\
.plot.scatter(
    x='sales',
    y='diss',
    color='temp_cut',
    trendline='ols',
    title='world2: temp --> sales --> diss',
)

NCY

df = pd.read_csv("https://raw.githubusercontent.com/guebin/DV2023/main/posts/NYCTaxi.csv")
df = df.assign(
    log_trip_duration = np.log(df.trip_duration),
    pickup_datetime = df.pickup_datetime.apply(pd.to_datetime),
    dropoff_datetime = df.dropoff_datetime.apply(pd.to_datetime),
    dist = np.sqrt((df.pickup_latitude-df.dropoff_latitude)**2 + (df.pickup_longitude-df.dropoff_longitude)**2),
    #---#
    vendor_id = df.vendor_id.map({1:'A',2:'B'})
).assign(
    speed = lambda df: df.dist / df.trip_duration,
    pickup_hour = lambda df: df.pickup_datetime.dt.hour,
    dropoff_hour = lambda df: df.dropoff_datetime.dt.hour,
    dayofweek = lambda df: df.pickup_datetime.dt.dayofweek
)
df
id vendor_id pickup_datetime dropoff_datetime passenger_count pickup_longitude pickup_latitude dropoff_longitude dropoff_latitude store_and_fwd_flag trip_duration log_trip_duration dist speed pickup_hour dropoff_hour dayofweek
0 id2875421 B 2016-03-14 17:24:55 2016-03-14 17:32:30 1 -73.982155 40.767937 -73.964630 40.765602 N 455 6.120297 0.017680 0.000039 17 17 0
1 id3194108 A 2016-06-01 11:48:41 2016-06-01 12:19:07 1 -74.005028 40.746452 -73.972008 40.745781 N 1826 7.509883 0.033027 0.000018 11 12 2
2 id3564028 A 2016-01-02 01:16:42 2016-01-02 01:19:56 1 -73.954132 40.774784 -73.947418 40.779633 N 194 5.267858 0.008282 0.000043 1 1 5
3 id1660823 B 2016-03-01 06:40:18 2016-03-01 07:01:37 5 -73.982140 40.775326 -74.009850 40.721699 N 1279 7.153834 0.060363 0.000047 6 7 1
4 id1575277 B 2016-06-11 16:59:15 2016-06-11 17:33:27 1 -73.999229 40.722881 -73.982880 40.778297 N 2052 7.626570 0.057778 0.000028 16 17 5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
14582 id3647353 A 2016-05-16 22:12:09 2016-05-16 22:27:46 1 -73.990219 40.737076 -73.986748 40.702194 N 937 6.842683 0.035054 0.000037 22 22 0
14583 id2064944 A 2016-05-23 08:04:35 2016-05-23 08:19:20 1 -73.987068 40.730728 -73.974983 40.751331 N 885 6.785588 0.023886 0.000027 8 8 0
14584 id3286731 B 2016-05-31 16:56:13 2016-05-31 17:38:44 1 -73.863541 40.769711 -73.994644 40.750435 N 2551 7.844241 0.132513 0.000052 16 17 1
14585 id3453691 B 2016-03-07 18:11:54 2016-03-07 18:29:09 1 -74.006531 40.738232 -73.985970 40.726978 N 1035 6.942157 0.023439 0.000023 18 18 0
14586 id0995846 B 2016-05-09 17:26:56 2016-05-09 18:30:37 2 -73.789543 40.647099 -73.960320 40.798180 N 3821 8.248267 0.228013 0.000060 17 18 0

14587 rows × 17 columns

df.columns
Index(['id', 'vendor_id', 'pickup_datetime', 'dropoff_datetime',
       'passenger_count', 'pickup_longitude', 'pickup_latitude',
       'dropoff_longitude', 'dropoff_latitude', 'store_and_fwd_flag',
       'trip_duration', 'log_trip_duration', 'dist', 'speed', 'pickup_hour',
       'dropoff_hour', 'dayofweek'],
      dtype='object')
tidydata = df 
fig = px.parallel_coordinates(
    data_frame=tidydata,
    color='speed',
    dimensions=['dist','speed']
)
fig

C.

big =  df1.assign(
    temp_cut = lambda df: pd.qcut(df.temp,10,labels=list('0123456789')),
    sales_cut = lambda df: pd.qcut(df.sales,4,labels=['Q1','Q2','Q3','Q4']),
    diss_cut = lambda df: pd.qcut(df.diss,10)
)
small = big.groupby('diss_cut').agg({'diss':'mean'}).reset_index().rename({'diss':'diss_mean'},axis=1)
tidydata = big.merge(small).query("temp_cut == '8'")
#---#
fig = px.parallel_coordinates(
    data_frame=tidydata,
    color='diss_mean',
    dimensions=['sales','diss','temp']
)
fig
big =  df2.assign(
    temp_cut = lambda df: pd.qcut(df.temp,10,labels=list('0123456789')),
    sales_cut = lambda df: pd.qcut(df.sales,4,labels=['Q1','Q2','Q3','Q4']),
    diss_cut = lambda df: pd.qcut(df.diss,10)
)
small = big.groupby('diss_cut').agg({'diss':'mean'}).reset_index().rename({'diss':'diss_mean'},axis=1)
tidydata = big.merge(small).query("temp_cut == '8'")
#---#
fig = px.parallel_coordinates(
    data_frame=tidydata,
    color='diss_mean',
    dimensions=['sales','temp','diss']
)
fig
big =  df2.assign(diss_cut = lambda df: pd.qcut(df.diss,4)).sort_values('diss_cut')
small = big.groupby('diss_cut').agg({'diss':'mean'}).reset_index().rename({'diss':'diss_mean'},axis=1)
tidydata = big.merge(small)
#---#
fig = px.parallel_coordinates(
    data_frame=tidydata,
    color='diss_mean',
    dimensions=['sales','temp','diss']
)
fig
big =  df1.assign(
    temp_cut = lambda df: pd.qcut(df.temp,4,labels=['Q1','Q2','Q3','Q4']),
    sales_cut = lambda df: pd.qcut(df.sales,4,labels=['Q1','Q2','Q3','Q4']),
    diss_cut = lambda df: pd.qcut(df.diss,4,labels=['Q1','Q2','Q3','Q4'])
)
small = big.groupby('diss_cut').agg({'diss':'mean'}).reset_index().rename({'diss':'diss_mean'},axis=1)
tidydata = big.merge(small).query("temp_cut == 'Q1'")
#---#
fig = px.parallel_categories(
    data_frame=tidydata,
    color='diss_mean',
    dimensions=['sales_cut','diss_cut']
)
fig
big =  df2.assign(
    temp_cut = lambda df: pd.qcut(df.temp,4,labels=['Q1','Q2','Q3','Q4']),
    sales_cut = lambda df: pd.qcut(df.sales,4,labels=['Q1','Q2','Q3','Q4']),
    diss_cut = lambda df: pd.qcut(df.diss,4,labels=['Q1','Q2','Q3','Q4'])
)
small = big.groupby('temp_cut').agg({'temp':'mean'}).reset_index().rename({'temp':'temp_mean'},axis=1)
tidydata = big.merge(small).query("temp_cut == 'Q1'")
#---#
fig = px.parallel_categories(
    data_frame=tidydata,
    color='temp_mean',
    dimensions=['sales_cut','temp_cut','diss_cut']
)
fig
tidydata
temp diss sales temp_cut sales_cut diss_cut temp_mean
0 -0.5 28.513929 22.476999 Q1 Q1 Q1 2.533537
1 1.4 34.175875 21.832150 Q1 Q1 Q1 2.533537
2 2.6 37.998085 29.733820 Q1 Q1 Q1 2.533537
3 2.0 35.855640 34.661209 Q1 Q1 Q1 2.533537
4 2.5 36.926338 23.360926 Q1 Q1 Q1 2.533537
... ... ... ... ... ... ... ...
651 24.5 103.209725 80.683151 Q4 Q4 Q4 25.965625
652 25.2 105.867392 68.582861 Q4 Q4 Q4 25.965625
653 23.6 101.121698 56.677999 Q4 Q3 Q4 25.965625
654 24.0 101.244255 67.619936 Q4 Q4 Q4 25.965625
655 24.1 102.836510 66.583458 Q4 Q4 Q4 25.965625

656 rows × 7 columns

tidydata = pd.DataFrame({'temp':temp,'diss':disease,'sales':icecream_sales})\
.assign(
    temp_cut = lambda df: pd.qcut(df.temp,q=4,labels=['Q1','Q2','Q3','Q4']),
    diss_cut = lambda df: pd.qcut(df.diss,q=4,labels=['Q1','Q2','Q3','Q4']),
    sales_cut = lambda df: pd.qcut(df.sales,q=4,labels=['Q1','Q2','Q3','Q4']),
)
display(tidydata)
#---#
px.parallel_categories(
    data_frame=tidydata,
    color='diss',
    dimensions=['temp_cut','diss_cut', 'sales_cut']
)
NameError: name 'disease' is not defined

3. px.

data = dict(
    number=[39, 27.4, 20.6, 11, 2],
    stage=["Website visit", "Downloads", "Potential customers", "Requested price", "invoice sent"])
fig = px.funnel(data, x='number', y='stage')
fig.show()
pd.DataFrame(data)
number stage
0 39.0 Website visit
1 27.4 Downloads
2 20.6 Potential customers
3 11.0 Requested price
4 2.0 invoice sent

3. pie,

A. px.pie

df = pd.read_csv('https://raw.githubusercontent.com/guebin/DV2021/master/_notebooks/2021-10-25-FIFA22_official_data.csv').loc[:,lambda df: df.isna().mean()<0.5].dropna()
continent_mapping = {
    'Asia': ['Afghanistan', 'Japan', 'Macau', 'Chinese Taipei', 'Indonesia', 'Korea Republic', 'Kazakhstan', 'Kyrgyzstan', 'Bhutan', 'Philippines', 'Syria', 'China PR', 'Oman', 'Guam', 'Vietnam', 'Jordan', 'Palestine', 'Malaysia', 'Hong Kong', 'Korea DPR', 'Lebanon', 'Uzbekistan', 'India','Iraq', 'Iran', 'Saudi Arabia', 'United Arab Emirates','Australia'],
    'Europe': ['Portugal', 'Germany', 'Belgium', 'Netherlands', 'Croatia', 'Spain', 'Austria', 'Italy', 'France', 'Serbia', 'England', 'Poland', 'Ukraine', 'Wales', 'Scotland', 'Czech Republic', 'Slovakia', 'Romania', 'Bosnia and Herzegovina', 'Republic of Ireland', 'Norway', 'Sweden', 'Bulgaria', 'Lithuania', 'Estonia', 'Latvia', 'Liechtenstein','Albania','Denmark','Finland','Greece','Hungary','Iceland','Luxembourg','Northern Ireland','Slovenia','Switzerland','Andorra','Azerbaijan','Belarus','Cyprus','Faroe Islands','Georgia','Kosovo','Malta','Moldova','Montenegro','North Macedonia','Armenia','Gibraltar','Russia','Turkey','Israel'],
    'South America': ['Uruguay', 'Argentina', 'Brazil', 'Chile', 'Colombia', 'Ecuador', 'Paraguay', 'Venezuela', 'Suriname', 'Bolivia','Peru','Guyana'],
    'Africa': ['Egypt', "Côte d'Ivoire", 'Senegal', 'Morocco', 'Ghana', 'Algeria', 'Guinea', 'Mali', 'Congo DR', 'Liberia', 'Cameroon', 'Tunisia', 'Comoros', 'Kenya', 'South Africa', 'Zimbabwe', 'Madagascar', 'Mozambique', 'Equatorial Guinea', 'Congo', 'Burundi', 'Grenada', 'Thailand', 'Togo', 'Sudan', 'Mauritania','Guinea Bissau','Libya','Nigeria','Zambia','Angola','Benin','Burkina Faso','Cape Verde Islands','Central African Republic','Chad','Eritrea','Gabon','Gambia','Mauritius','Namibia','Rwanda','Sierra Leone','South Sudan','São Tomé e Príncipe','Uganda','Niger'],
    'North and Central America': ['Antigua and Barbuda', 'Barbados', 'Belize', 'Bermuda', 'Canada', 'Costa Rica', 'Cuba', 'Curacao', 'Dominican Republic', 'El Salvador', 'Guatemala', 'Haiti', 'Honduras', 'Jamaica', 'Mexico', 'Montserrat', 'Panama', 'Puerto Rico', 'Saint Kitts and Nevis', 'Saint Lucia', 'Trinidad and Tobago', 'United States'],
    'Oceania': ['New Zealand', 'Fiji', 'Papua New Guinea','New Caledonia'],
}
df.assign(
    Continent = lambda df: [k for x in df['Nationality'] for k,v in continent_mapping.items() if x in v]
)
ID Name Age Photo Nationality Flag Overall Potential Club Club Logo ... GKDiving GKHandling GKKicking GKPositioning GKReflexes Best Position Best Overall Rating Release Clause DefensiveAwareness Continent
0 212198 Bruno Fernandes 26 https://cdn.sofifa.com/players/212/198/22_60.png Portugal https://cdn.sofifa.com/flags/pt.png 88 89 Manchester United https://cdn.sofifa.com/teams/11/30.png ... 12.0 14.0 15.0 8.0 14.0 CAM 88.0 €206.9M 72.0 Europe
1 209658 L. Goretzka 26 https://cdn.sofifa.com/players/209/658/22_60.png Germany https://cdn.sofifa.com/flags/de.png 87 88 FC Bayern München https://cdn.sofifa.com/teams/21/30.png ... 13.0 8.0 15.0 11.0 9.0 CM 87.0 €160.4M 74.0 Europe
2 176580 L. Suárez 34 https://cdn.sofifa.com/players/176/580/22_60.png Uruguay https://cdn.sofifa.com/flags/uy.png 88 88 Atlético de Madrid https://cdn.sofifa.com/teams/240/30.png ... 27.0 25.0 31.0 33.0 37.0 ST 88.0 €91.2M 42.0 South America
3 192985 K. De Bruyne 30 https://cdn.sofifa.com/players/192/985/22_60.png Belgium https://cdn.sofifa.com/flags/be.png 91 91 Manchester City https://cdn.sofifa.com/teams/10/30.png ... 15.0 13.0 5.0 10.0 13.0 CM 91.0 €232.2M 68.0 Europe
4 224334 M. Acuña 29 https://cdn.sofifa.com/players/224/334/22_60.png Argentina https://cdn.sofifa.com/flags/ar.png 84 84 Sevilla FC https://cdn.sofifa.com/teams/481/30.png ... 8.0 14.0 13.0 13.0 14.0 LB 84.0 €77.7M 80.0 South America
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16703 259718 F. Gebhardt 19 https://cdn.sofifa.com/players/259/718/22_60.png Germany https://cdn.sofifa.com/flags/de.png 52 66 FC Basel 1893 https://cdn.sofifa.com/teams/896/30.png ... 53.0 45.0 47.0 52.0 57.0 GK 52.0 €361K 6.0 Europe
16704 251433 B. Voll 20 https://cdn.sofifa.com/players/251/433/22_60.png Germany https://cdn.sofifa.com/flags/de.png 58 69 F.C. Hansa Rostock https://cdn.sofifa.com/teams/27/30.png ... 59.0 60.0 56.0 55.0 61.0 GK 58.0 €656K 5.0 Europe
16706 262846 �. Dobre 20 https://cdn.sofifa.com/players/262/846/22_60.png Romania https://cdn.sofifa.com/flags/ro.png 53 63 FC Academica Clinceni https://cdn.sofifa.com/teams/113391/30.png ... 57.0 52.0 53.0 48.0 58.0 GK 53.0 €279K 5.0 Europe
16707 241317 21 Xue Qinghao 19 https://cdn.sofifa.com/players/241/317/21_60.png China PR https://cdn.sofifa.com/flags/cn.png 47 60 Shanghai Shenhua FC https://cdn.sofifa.com/teams/110955/30.png ... 49.0 48.0 45.0 38.0 52.0 GK 47.0 €223K 21.0 Asia
16708 259646 A. Shaikh 18 https://cdn.sofifa.com/players/259/646/22_60.png India https://cdn.sofifa.com/flags/in.png 47 67 ATK Mohun Bagan FC https://cdn.sofifa.com/teams/113146/30.png ... 49.0 41.0 39.0 45.0 49.0 GK 47.0 €259K 7.0 Asia

14398 rows × 64 columns

tidydata = df.assign(
    Continent = lambda df: [k for x in df['Nationality'] for k,v in continent_mapping.items() if x in v]
).groupby('Continent').agg({'ID':'count'}).reset_index().rename({'ID':'Count'},axis=1)
px.pie(
    data_frame=tidydata,
    names = 'Continent',
    values = 'Count',
)
#tidydata

- pie chart with hole

tidydata = df.assign(
    Continent = lambda df: [k for x in df['Nationality'] for k,v in continent_mapping.items() if x in v]

).query('Overall > Overall.mean()')\
.groupby('Continent').agg({'ID':'count'}).reset_index().rename({'ID':'Count'},axis=1)
#---#
px.pie(
    data_frame=tidydata,
    labels = 'Continent',
    values = 'Count',
    hole = 0.3
)
#tidydata

-

tidydata = df.assign(
    Continent = lambda df: [k for x in df['Nationality'] for k,v in continent_mapping.items() if x in v]

).query('Overall > Overall.mean()')\
.groupby('Continent').agg({'ID':'count'}).reset_index().rename({'ID':'Count'},axis=1)
#---#
go.Figure(
    data = go.Pie(labels = tidydata.Continent, values= tidydata.Count, pull = [0.2,0,0,0,0,0],hole=0.3)
)
a=list([1,2,3])

B. px.sunburst

tidydata = df.groupby('Nationality').agg('size').reset_index().rename({0:'Count'},axis=1)\
.assign(
    Continent = lambda df: [k for x in df['Nationality'] for k,v in continent_mapping.items() if x in v]
)
#---# 
px.sunburst(
    data_frame=tidydata, 
    path=['Continent','Nationality'],
    values='Count'
)

C. px.treemap

tidydata = df.groupby('Nationality').agg('size').reset_index().rename({0:'Count'},axis=1)\
.assign(
    Continent = lambda df: [k for x in df['Nationality'] for k,v in continent_mapping.items() if x in v]
)
#---# 
px.treemap(
    data_frame=tidydata, 
    path=['Continent','Nationality'],
    values='Count'
)
# tidydata = df.groupby('Nationality').agg('size').reset_index().rename({0:'Count'},axis=1)\
# .assign(
#     Continent = lambda df: [k for x in df['Nationality'] for k,v in continent_mapping.items() if x in v]
# )
# #---# 
# px.icicle(
#     data_frame=tidydata, 
#     path=['Continent','Nationality',],
#     values='Count'
# )

4.

df = pd.read_csv('https://raw.githubusercontent.com/guebin/MP2023/main/posts/insurance.csv')
df
age sex bmi children smoker region charges
0 19 female 27.900 0 yes southwest 16884.92400
1 18 male 33.770 1 no southeast 1725.55230
2 28 male 33.000 3 no southeast 4449.46200
3 33 male 22.705 0 no northwest 21984.47061
4 32 male 28.880 0 no northwest 3866.85520
... ... ... ... ... ... ... ...
1333 50 male 30.970 3 no northwest 10600.54830
1334 18 female 31.920 0 no northeast 2205.98080
1335 18 female 36.850 0 no southeast 1629.83350
1336 21 female 25.800 0 no southwest 2007.94500
1337 61 female 29.070 0 yes northwest 29141.36030

1338 rows × 7 columns

_df = df.assign(
    age_cut = lambda df: pd.qcut(df.age,q=4,labels=['Q1','Q2','Q3','Q4']),
    bmi_cut = lambda df: pd.qcut(df.bmi,q=3,labels=['low','midium','high']),
    charges_cut = lambda df: pd.qcut(df.charges,q=4,labels=['Q1','Q2','Q3','Q4']),
)

    
{'Q1': 2853.0894414925374,
 'Q2': 6991.862334038923,
 'Q3': 12106.074964071857,
 'Q4': 31108.444687432835}
_df
age sex bmi children smoker region charges age_cut bmi_cut charges_cut
0 19 female 27.900 0 yes southwest 16884.92400 Q1 midium Q4
1 18 male 33.770 1 no southeast 1725.55230 Q1 high Q1
2 28 male 33.000 3 no southeast 4449.46200 Q2 midium Q1
3 33 male 22.705 0 no northwest 21984.47061 Q2 low Q4
4 32 male 28.880 0 no northwest 3866.85520 Q2 midium Q1
... ... ... ... ... ... ... ... ... ... ...
1333 50 male 30.970 3 no northwest 10600.54830 Q3 midium Q3
1334 18 female 31.920 0 no northeast 2205.98080 Q1 midium Q1
1335 18 female 36.850 0 no southeast 1629.83350 Q1 high Q1
1336 21 female 25.800 0 no southwest 2007.94500 Q1 low Q1
1337 61 female 29.070 0 yes northwest 29141.36030 Q4 midium Q4

1338 rows × 10 columns

tidydata = df.assign(
    age_cut = lambda df: pd.qcut(df.age,q=4,labels=['Q1','Q2','Q3','Q4']),
    bmi_cut = lambda df: pd.qcut(df.bmi,q=3,labels=['low','midium','high']),
    charges_cut = lambda df: pd.qcut(df.charges,q=4,labels=['Q1','Q2','Q3','Q4']),
).assign(
    charges_mean = lambda df: df.charges_cut.map(df.groupby('charges_cut').agg({'charges':'mean'}).rename({'charges':'charges_mean'},axis=1).to_dict()['charges_mean'])
)
tidydata
age sex bmi children smoker region charges age_cut bmi_cut charges_cut charges_mean
0 19 female 27.900 0 yes southwest 16884.92400 Q1 midium Q4 31108.444687
1 18 male 33.770 1 no southeast 1725.55230 Q1 high Q1 2853.089441
2 28 male 33.000 3 no southeast 4449.46200 Q2 midium Q1 2853.089441
3 33 male 22.705 0 no northwest 21984.47061 Q2 low Q4 31108.444687
4 32 male 28.880 0 no northwest 3866.85520 Q2 midium Q1 2853.089441
... ... ... ... ... ... ... ... ... ... ... ...
1333 50 male 30.970 3 no northwest 10600.54830 Q3 midium Q3 12106.074964
1334 18 female 31.920 0 no northeast 2205.98080 Q1 midium Q1 2853.089441
1335 18 female 36.850 0 no southeast 1629.83350 Q1 high Q1 2853.089441
1336 21 female 25.800 0 no southwest 2007.94500 Q1 low Q1 2853.089441
1337 61 female 29.070 0 yes northwest 29141.36030 Q4 midium Q4 31108.444687

1338 rows × 11 columns

tidydata = df.assign(
    age_cut = lambda df: pd.qcut(df.age,q=4,labels=['Q1','Q2','Q3','Q4']),
    bmi_cut = lambda df: pd.qcut(df.bmi,q=3,labels=['low','midium','high']),
    charges_cut = lambda df: pd.qcut(df.charges,q=4,labels=['Q1','Q2','Q3','Q4']),
).assign(
    charges_mean = lambda df: df.charges_cut.map(df.groupby('charges_cut').agg({'charges':'mean'}).rename({'charges':'charges_mean'},axis=1).to_dict()['charges_mean'])
)
#---#
px.parallel_categories(
    data_frame=tidydata,
    color='charges_mean',
    dimensions=['smoker','age_cut','bmi_cut','region','children','charges_cut']
)
tidydata = df.assign(
    age_cut = lambda df: pd.qcut(df.age,q=4,labels=['Q1','Q2','Q3','Q4']),
    bmi_cut = lambda df: pd.qcut(df.bmi,q=3,labels=['low','midium','high']),
    charges_cut = lambda df: pd.qcut(df.charges,q=4,labels=['Q1','Q2','Q3','Q4']),
).assign(
    charges_mean = lambda df: df.charges_cut.map(df.groupby('charges_cut').agg({'charges':'mean'}).rename({'charges':'charges_mean'},axis=1).to_dict()['charges_mean'])
).query("smoker == 'no'")
#---#
px.parallel_categories(
    data_frame=tidydata,
    color='charges_mean',
    dimensions=['age_cut','bmi_cut','region','children','charges_cut']
)
tidydata = df.assign(
    Fsize = lambda df: df['SibSp'] + df['Parch'],
    Sex = lambda df: df['Sex'].map({'female':0,'male':1}),
).sort_values('Fsize')
px.parallel_categories(
    data_frame=tidydata,
    color='Survived',
    dimensions=['Fsize','Pclass','Embarked','Sex','Survived']
)
tidydata = df.assign(
    Fsize = lambda df: df['SibSp'] + df['Parch'],
    Sex = lambda df: df['Sex'].map({'female':0,'male':1}),
)
px.parallel_categories(
    data_frame=tidydata,
    color='Sex',
    dimensions=['Fsize','Pclass','Embarked','Sex','Survived']
)
df = px.data.wind()
fig = px.bar_polar(df, r="frequency", theta="direction",
                   color="strength", template="plotly_dark",
                   color_discrete_sequence= px.colors.sequential.Plasma_r)
fig.show()
import plotly.graph_objects as go
import urllib, json

url = 'https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json'
response = urllib.request.urlopen(url)
data = json.loads(response.read())

# override gray link colors with 'source' colors
opacity = 0.4
# change 'magenta' to its 'rgba' value to add opacity
data['data'][0]['node']['color'] = ['rgba(255,0,255, 0.8)' if color == "magenta" else color for color in data['data'][0]['node']['color']]
data['data'][0]['link']['color'] = [data['data'][0]['node']['color'][src].replace("0.8", str(opacity))
                                    for src in data['data'][0]['link']['source']]

fig = go.Figure(data=[go.Sankey(
    valueformat = ".0f",
    valuesuffix = "TWh",
    # Define nodes
    node = dict(
      pad = 15,
      thickness = 15,
      line = dict(color = "black", width = 0.5),
      label =  data['data'][0]['node']['label'],
      color =  data['data'][0]['node']['color']
    ),
    # Add links
    link = dict(
      source =  data['data'][0]['link']['source'],
      target =  data['data'][0]['link']['target'],
      value =  data['data'][0]['link']['value'],
      label =  data['data'][0]['link']['label'],
      color =  data['data'][0]['link']['color']
))])

fig.update_layout(title_text="Energy forecast for 2050<br>Source: Department of Energy & Climate Change, Tom Counsell via <a href='https://bost.ocks.org/mike/sankey/'>Mike Bostock</a>",
                  font_size=10)
fig.show()